"""
Fuzz tests for security-critical functions using Hypothesis.

These tests verify that security functions don't crash on arbitrary input
and properly reject malicious inputs.
"""

import html
import json
import tempfile
from pathlib import Path
from urllib.parse import quote, unquote

import pytest
from hypothesis import assume, given, settings, strategies as st

from local_deep_research.security.path_validator import PathValidator
from local_deep_research.utilities.url_utils import normalize_url


# =============================================================================
# Custom Strategies for Security Testing
# =============================================================================


def path_traversal_payloads():
    """Generate various path traversal attack payloads."""
    basic = [
        "../",
        "..\\",
        "../" * 10,
        "..\\..\\",
        "....//",
        "..../",
        ".../",
        "..%2f",
        "..%5c",
        "%2e%2e/",
        "%2e%2e%2f",
        "..%252f",  # Double encoding
        "..%c0%af",  # Overlong UTF-8
        "..%c1%9c",
        "..;/",  # Parameter injection
        "..%00/",  # Null byte
        "..//",
        ".\\.\\",
        "..\\../",
    ]
    return st.sampled_from(basic)


def null_byte_payloads():
    """Generate null byte injection payloads."""
    return st.sampled_from(
        [
            "\x00",
            "%00",
            "\x00\x00",
            "file\x00.txt",
            "file%00.txt",
            "\x00/../etc/passwd",
        ]
    )


def xss_payloads():
    """Generate XSS attack payloads for HTML contexts."""
    return st.sampled_from(
        [
            "<script>alert(1)</script>",
            '"><script>alert(1)</script>',
            "javascript:alert(1)",
            "<img src=x onerror=alert(1)>",
            "<svg onload=alert(1)>",
            "'-alert(1)-'",
            '"><img src=x onerror=alert(1)>',
            "<body onload=alert(1)>",
            "{{constructor.constructor('alert(1)')()}}",
            "${alert(1)}",
            "#{alert(1)}",
            "<iframe src='javascript:alert(1)'>",
            "<math><maction xlink:href='javascript:alert(1)'>",
            "data:text/html,<script>alert(1)</script>",
        ]
    )


def dangerous_url_schemes():
    """Generate dangerous URL schemes."""
    return st.sampled_from(
        [
            "javascript:",
            "data:",
            "vbscript:",
            "file:",
            "ftp://",
            "gopher://",
            "jar:",
            "netdoc:",
        ]
    )


# =============================================================================
# PATH VALIDATOR FUZZ TESTS
# =============================================================================


class TestPathValidatorFuzzing:
    """Comprehensive fuzz tests for PathValidator security functions."""

    @given(user_input=st.text(max_size=1000))
    @settings(max_examples=200)
    def test_validate_safe_path_no_crash(self, user_input):
        """Test that validate_safe_path never crashes on arbitrary input."""
        with tempfile.TemporaryDirectory() as temp_dir:
            try:
                PathValidator.validate_safe_path(user_input, temp_dir)
            except ValueError:
                # Expected for invalid paths - this is correct behavior
                pass
            except Exception as e:
                # Any other exception type is a bug
                pytest.fail(
                    f"Unexpected exception type: {type(e).__name__}: {e}"
                )

    @given(
        user_input=st.text(
            alphabet=st.characters(
                blacklist_categories=("Cs",),  # Exclude surrogates
            ),
            max_size=500,
        )
    )
    @settings(max_examples=200)
    def test_validate_config_path_no_crash(self, user_input):
        """Test that validate_config_path never crashes on arbitrary input."""
        try:
            PathValidator.validate_config_path(user_input, "/tmp")
        except ValueError:
            # Expected for invalid paths
            pass
        except Exception as e:
            # Any other exception type is a bug
            pytest.fail(f"Unexpected exception type: {type(e).__name__}: {e}")

    @given(payload=path_traversal_payloads(), suffix=st.text(max_size=50))
    @settings(max_examples=100)
    def test_path_traversal_variants_blocked(self, payload, suffix):
        """Test that various path traversal encodings are blocked."""
        # Filter out problematic characters from suffix
        safe_suffix = "".join(c for c in suffix if c.isalnum() or c in "._-/")
        path = payload + safe_suffix

        with tempfile.TemporaryDirectory() as temp_dir:
            try:
                result = PathValidator.validate_safe_path(path, temp_dir)
                # If it didn't raise, verify path is still within base_dir
                if result:
                    resolved = result.resolve()
                    base_resolved = Path(temp_dir).resolve()
                    assert str(resolved).startswith(str(base_resolved)), (
                        f"Path escaped base_dir: {resolved}"
                    )
            except ValueError:
                # Expected - traversal was blocked
                pass

    @given(
        null_count=st.integers(min_value=1, max_value=10),
        position=st.sampled_from(["start", "middle", "end"]),
    )
    @settings(max_examples=100)
    def test_null_byte_injection_positions(self, null_count, position):
        """Test null byte injection at various positions."""
        null_bytes = "\x00" * null_count
        if position == "start":
            path = null_bytes + "file.json"
        elif position == "middle":
            path = "test" + null_bytes + "file.json"
        else:
            path = "testfile.json" + null_bytes

        try:
            PathValidator.validate_config_path(path, "/tmp")
        except ValueError:
            # Expected - null bytes should be rejected
            pass
        except Exception as e:
            pytest.fail(
                f"Unexpected exception for null byte input: {type(e).__name__}"
            )

    @given(
        prefix=st.sampled_from(
            ["etc", "proc", "sys", "dev", "var/log", "root", "home"]
        ),
        suffix=st.text(
            alphabet=st.characters(whitelist_categories=("L", "N")), max_size=20
        ),
    )
    @settings(max_examples=100)
    def test_restricted_system_paths_blocked(self, prefix, suffix):
        """Test that restricted system directories are blocked."""
        for path_variant in [
            f"/{prefix}/{suffix}",
            f"{prefix}/{suffix}",
            f"/{prefix}",
            f"../{prefix}/{suffix}",
        ]:
            try:
                PathValidator.validate_config_path(path_variant, "/tmp")
            except ValueError:
                # Expected - restricted paths should be blocked
                pass

    @given(
        depth=st.integers(min_value=1, max_value=50),
        segment=st.text(
            alphabet=st.characters(whitelist_categories=("L",)),
            min_size=1,
            max_size=10,
        ),
    )
    @settings(max_examples=50)
    def test_deep_path_nesting(self, depth, segment):
        """Test handling of deeply nested paths."""
        assume(len(segment) > 0)
        path = "/".join([segment] * depth)

        with tempfile.TemporaryDirectory() as temp_dir:
            try:
                PathValidator.validate_safe_path(path, temp_dir)
            except ValueError:
                pass
            except Exception as e:
                pytest.fail(
                    f"Unexpected exception for deep path: {type(e).__name__}"
                )

    @given(
        encoded=st.sampled_from(
            [
                "%2e%2e%2f",  # ../
                "%2e%2e/",  # ../
                "..%2f",  # ../
                "%2e%2e%5c",  # ..\
                "..%5c",  # ..\
                "%252e%252e%252f",  # Double encoded ../
                "..%c0%af",  # Overlong UTF-8 /
                "..%e0%80%af",  # Overlong UTF-8 /
                "..%c0%2f",  # Mixed encoding
            ]
        )
    )
    @settings(max_examples=50)
    def test_url_encoded_traversal_blocked(self, encoded):
        """Test URL-encoded path traversal attempts."""
        # Try both encoded and decoded versions
        for path in [encoded, unquote(encoded)]:
            with tempfile.TemporaryDirectory() as temp_dir:
                try:
                    result = PathValidator.validate_safe_path(path, temp_dir)
                    if result:
                        # Verify we didn't escape
                        resolved = result.resolve()
                        base_resolved = Path(temp_dir).resolve()
                        assert str(resolved).startswith(str(base_resolved))
                except ValueError:
                    pass


# =============================================================================
# URL NORMALIZATION FUZZ TESTS
# =============================================================================


class TestURLNormalizationFuzzing:
    """Fuzz tests for URL normalization functions."""

    @given(
        url=st.text(
            alphabet=st.characters(blacklist_categories=("Cs",)),
            min_size=1,
            max_size=500,
        )
    )
    @settings(max_examples=200)
    def test_normalize_url_no_crash(self, url):
        """Test that normalize_url never crashes on arbitrary input."""
        try:
            result = normalize_url(url)
            # Result should always be a string
            assert isinstance(result, str)
        except ValueError:
            # Empty URL raises ValueError - this is expected
            pass
        except Exception as e:
            pytest.fail(f"Unexpected exception: {type(e).__name__}: {e}")

    @given(scheme=dangerous_url_schemes(), rest=st.text(max_size=100))
    @settings(max_examples=100)
    def test_dangerous_schemes_handling(self, scheme, rest):
        """Test handling of dangerous URL schemes."""
        url = scheme + rest
        try:
            result = normalize_url(url)
            # Function should either reject or sanitize dangerous schemes
            # At minimum, it shouldn't crash
            assert isinstance(result, str)
        except ValueError:
            pass

    @given(
        hostname=st.sampled_from(
            [
                "localhost",
                "127.0.0.1",
                "0.0.0.0",
                "[::1]",
                "127.0.0.2",
                "127.1",
                "2130706433",  # 127.0.0.1 as decimal
                "0x7f000001",  # 127.0.0.1 as hex
                "0177.0.0.1",  # Octal
                "localhost.localdomain",
            ]
        ),
        port=st.integers(min_value=1, max_value=65535),
    )
    @settings(max_examples=100)
    def test_localhost_variants(self, hostname, port):
        """Test various localhost representations."""
        url = f"{hostname}:{port}"
        try:
            result = normalize_url(url)
            assert isinstance(result, str)
            # Should have a scheme
            assert "://" in result
        except ValueError:
            pass

    @given(
        host=st.text(
            alphabet=st.characters(whitelist_categories=("L", "N")),
            min_size=1,
            max_size=50,
        ),
        port=st.integers(min_value=1, max_value=65535),
    )
    @settings(max_examples=100)
    def test_host_port_combinations(self, host, port):
        """Test various host:port combinations."""
        url = f"{host}:{port}"
        try:
            result = normalize_url(url)
            assert isinstance(result, str)
        except ValueError:
            pass

    @given(
        ipv6=st.from_regex(r"\[[0-9a-fA-F:]+\]", fullmatch=True),
        port=st.integers(min_value=1, max_value=65535),
    )
    @settings(max_examples=50)
    def test_ipv6_addresses(self, ipv6, port):
        """Test IPv6 address handling."""
        url = f"{ipv6}:{port}"
        try:
            result = normalize_url(url)
            assert isinstance(result, str)
        except ValueError:
            pass


# =============================================================================
# HTML/PDF XSS FUZZ TESTS
# =============================================================================


class TestHTMLSanitizationFuzzing:
    """Fuzz tests for HTML generation and sanitization."""

    @given(payload=xss_payloads())
    @settings(max_examples=50)
    def test_xss_payloads_escaped_in_html(self, payload):
        """Test that XSS payloads with HTML tags are properly escaped."""
        # Test html.escape function behavior
        escaped = html.escape(payload)

        # HTML angle brackets should always be escaped
        # This prevents script injection via HTML tags
        if "<" in payload:
            assert "&lt;" in escaped, "< should be escaped to &lt;"
        if ">" in payload:
            assert "&gt;" in escaped, "> should be escaped to &gt;"

        # The escaped string should never contain raw angle brackets
        # (the original ones should all be escaped)
        original_lt_count = payload.count("<")
        original_gt_count = payload.count(">")
        escaped_lt_count = escaped.count("&lt;")
        escaped_gt_count = escaped.count("&gt;")

        assert original_lt_count == escaped_lt_count, "All < should be escaped"
        assert original_gt_count == escaped_gt_count, "All > should be escaped"

    @given(
        title=st.text(max_size=200),
        key=st.text(
            alphabet=st.characters(whitelist_categories=("L", "N")), max_size=50
        ),
        value=st.text(max_size=200),
    )
    @settings(max_examples=100)
    def test_metadata_escaping(self, title, key, value):
        """Test that metadata values are properly escaped for HTML."""
        # Simulate what PDFService._markdown_to_html does
        # These should be escaped before insertion
        escaped_title = html.escape(title)
        escaped_key = html.escape(key)
        escaped_value = html.escape(value)

        # Verify no unescaped angle brackets (except the tags themselves)
        assert title.count("<") == escaped_title.count("&lt;")
        assert title.count(">") == escaped_title.count("&gt;")

        # Verify the escaped values don't contain raw HTML special chars
        assert "<" not in escaped_title or "&lt;" in escaped_title
        assert '"' not in escaped_key or "&quot;" in escaped_key
        assert '"' not in escaped_value or "&quot;" in escaped_value

    @given(
        content=st.text(
            alphabet=st.characters(blacklist_categories=("Cs",)), max_size=1000
        )
    )
    @settings(max_examples=100)
    def test_markdown_content_handling(self, content):
        """Test markdown content doesn't cause crashes."""
        # Import here to avoid import errors if markdown not installed
        try:
            import markdown

            md = markdown.Markdown(
                extensions=["tables", "fenced_code", "footnotes"]
            )
            # Should not crash on arbitrary input
            result = md.convert(content)
            assert isinstance(result, str)
        except ImportError:
            pytest.skip("markdown not installed")
        except Exception as e:
            # Some parsing errors are acceptable, but not crashes
            if "crash" in str(type(e).__name__).lower():
                pytest.fail(f"Markdown parser crashed: {e}")


# =============================================================================
# JSON PARSING ROBUSTNESS FUZZ TESTS
# =============================================================================


class TestJSONParsingFuzzing:
    """Fuzz tests for JSON parsing robustness."""

    @given(
        data=st.recursive(
            st.none()
            | st.booleans()
            | st.integers()
            | st.floats(allow_nan=False, allow_infinity=False)
            | st.text(max_size=100),
            lambda children: st.lists(children, max_size=10)
            | st.dictionaries(st.text(max_size=20), children, max_size=10),
            max_leaves=50,
        )
    )
    @settings(max_examples=100)
    def test_json_roundtrip(self, data):
        """Test JSON serialization/deserialization doesn't crash."""
        try:
            serialized = json.dumps(data)
            deserialized = json.loads(serialized)
            # Should roundtrip correctly
            assert json.dumps(deserialized) == serialized
        except (TypeError, ValueError):
            # Some Python objects can't be JSON serialized
            pass

    @given(malformed=st.text(max_size=500))
    @settings(max_examples=100)
    def test_malformed_json_handling(self, malformed):
        """Test that malformed JSON raises JSONDecodeError, not crashes."""
        try:
            json.loads(malformed)
        except json.JSONDecodeError:
            # Expected for malformed JSON
            pass
        except Exception as e:
            # Any other exception is unexpected
            if not isinstance(e, (ValueError, TypeError)):
                pytest.fail(
                    f"Unexpected exception parsing JSON: {type(e).__name__}"
                )

    @given(depth=st.integers(min_value=1, max_value=100))
    @settings(max_examples=50)
    def test_deeply_nested_json(self, depth):
        """Test handling of deeply nested JSON structures."""
        # Build nested structure
        nested = {"value": None}
        for _ in range(depth):
            nested = {"nested": nested}

        try:
            serialized = json.dumps(nested)
            json.loads(serialized)
        except RecursionError:
            # Very deep nesting might hit recursion limit - that's acceptable
            pass
        except Exception as e:
            pytest.fail(
                f"Unexpected exception for nested JSON: {type(e).__name__}"
            )

    @given(
        size=st.integers(min_value=1, max_value=10000),
        char=st.characters(whitelist_categories=("L", "N")),
    )
    @settings(max_examples=50)
    def test_large_json_strings(self, size, char):
        """Test handling of large JSON string values."""
        large_string = char * size
        data = {"large": large_string}

        try:
            serialized = json.dumps(data)
            result = json.loads(serialized)
            assert result["large"] == large_string
        except MemoryError:
            # Very large strings might cause memory issues - acceptable
            pass


# =============================================================================
# SEARCH QUERY FUZZ TESTS
# =============================================================================


class TestSearchQueryFuzzing:
    """Fuzz tests for search query handling."""

    @given(
        query=st.text(
            alphabet=st.characters(blacklist_categories=("Cs",)), max_size=500
        )
    )
    @settings(max_examples=100)
    def test_search_query_special_chars(self, query):
        """Test that search queries with special characters don't crash."""
        # Test common query operations
        try:
            # Normalize whitespace
            normalized = " ".join(query.split())

            # URL encode for API calls
            encoded = quote(query, safe="")

            # Should produce valid strings
            assert isinstance(normalized, str)
            assert isinstance(encoded, str)
        except Exception as e:
            pytest.fail(f"Query processing failed: {type(e).__name__}: {e}")

    @given(
        operator=st.sampled_from(
            [
                "AND",
                "OR",
                "NOT",
                "NEAR",
                "ADJ",
                "&&",
                "||",
                "!",
                "+",
                "-",
                "~",
                "*",
                "?",
                '"',
                "(",
                ")",
                "[",
                "]",
                "{",
                "}",
                ":",
                "^",
            ]
        ),
        term=st.text(
            alphabet=st.characters(whitelist_categories=("L",)),
            min_size=1,
            max_size=20,
        ),
    )
    @settings(max_examples=100)
    def test_search_operators_handling(self, operator, term):
        """Test handling of search operators in queries."""
        queries = [
            f"{operator}{term}",
            f"{term}{operator}",
            f"{term} {operator} {term}",
            f'"{operator}"',
            f"({term} {operator} {term})",
        ]

        for query in queries:
            try:
                # Basic sanitization - should not crash
                sanitized = query.replace("\x00", "")
                normalized = " ".join(sanitized.split())
                assert isinstance(normalized, str)
            except Exception as e:
                pytest.fail(
                    f"Search operator handling failed: {type(e).__name__}"
                )
